In [2]:
# federal all domains
# retrieved 7 May, 2017
import pandas as pd
may = pd.read_csv('../datasets/may-7-domains-30-days.csv')

In [3]:
jan = pd.read_csv('../datasets/jan-29-domains-30-days.csv')
apr = pd.read_csv('../datasets/apr_29_domains_30_days.csv')

In [4]:
may.head(10)


Out[4]:
domain visits pageviews users pageviews_per_session avg_session_duration exits
0 tools.usps.com 122923586 210285721 43282106 1.710703 124.577523 124318508
1 irs.gov 64288281 159935132 29065266 2.487781 134.754316 64279223
2 forecast.weather.gov 45964078 72227972 10032075 1.571400 112.142175 47755903
3 medlineplus.gov 45407427 62890065 32239660 1.385017 87.004353 45395075
4 usps.com 27401076 39044640 13940162 1.424931 80.276056 23217134
5 cdc.gov 24131162 48243384 16897989 1.999215 125.150997 24066933
6 weather.gov 18265737 40185940 5667797 2.200072 123.003969 16426153
7 usajobs.gov 16848582 136554955 5188550 8.104834 280.776700 16972099
8 ssa.gov 14570100 42347492 8623154 2.906465 179.368881 14551161
9 nps.gov 14267071 44258716 9434251 3.102159 227.030067 14225075

In [5]:
jan.head(10)


Out[5]:
domain visits pageviews users pageviews_per_session avg_session_duration exits
0 tools.usps.com 136655792 224797512 46759396 1.644991 115.951277 138858557
1 forecast.weather.gov 63880179 105332133 14367531 1.648902 104.578936 65740528
2 irs.gov 46463674 128178957 23443184 2.758692 155.731470 46411247
3 medlineplus.gov 45056329 62221347 32173145 1.380968 82.273208 45027658
4 usps.com 30359489 41651833 15463589 1.371954 78.620491 25351865
5 weather.gov 22945944 52648454 7695341 2.294456 126.729109 21094606
6 cdc.gov 22759991 43954537 16586680 1.931219 105.622051 22768183
7 usajobs.gov 21969486 93979170 6244580 4.277714 243.483729 22214420
8 whitehouse.gov 18694420 59996465 13785916 3.209325 119.480785 18892661
9 ssa.gov 15968202 45371712 9357450 2.841379 200.230642 15937073

In [6]:
jan.tail()


Out[6]:
domain visits pageviews users pageviews_per_session avg_session_duration exits
1910 wizard.gov 1638 1638 1638 1.000000 0.000000 1638
1911 wrcc.osmre.gov 1638 4096 1638 2.500611 21.504884 1638
1912 wwwcf.nlm.nih.gov 1638 5734 819 3.500611 0.000000 2458
1913 wy-mt.water.usgs.gov 1638 1638 819 1.000000 2.500611 1638
1914 zh-reg.usps.com 1638 4096 2458 2.500611 7.001221 2458

In [7]:
apr.head(10)


Out[7]:
domain visits pageviews users pageviews_per_session avg_session_duration exits
0 tools.usps.com 130000962 221116043 44761348 1.700880 120.329563 131161140
1 irs.gov 77180232 191648876 33978218 2.483134 136.144062 77072604
2 forecast.weather.gov 48962227 74347160 10833978 1.518460 87.400830 50551222
3 medlineplus.gov 47886795 67108971 33700249 1.401409 91.188104 47883405
4 usps.com 29474794 41907937 14609430 1.421823 77.300260 25391713
5 cdc.gov 25089169 51052073 17549282 2.034825 120.615435 25014592
6 weather.gov 18732342 43888460 5900891 2.342924 130.353963 17087415
7 usajobs.gov 16577241 110669463 5107665 6.675988 274.754568 16601818
8 ssa.gov 15706896 44962197 9203458 2.862577 183.567280 15682319
9 nps.gov 14445870 43750324 9724648 3.028570 221.653168 14395022

In [8]:
jan_rank = {}
for i,v in enumerate(jan.domain):
    jan_rank[v]=[i+1,jan.loc[i,'visits'],jan.loc[i,'pageviews']]
apr_rank = {}
for i,v in enumerate(apr.domain):
    apr_rank[v]=[i+1,apr.loc[i,'visits'],apr.loc[i,'pageviews']]
may_rank = {}
for i,v in enumerate(may.domain):
    may_rank[v]=[i+1,may.loc[i,'visits'],may.loc[i,'pageviews']]

jan_df = pd.DataFrame.from_dict(jan_rank,orient='index')
jan_df.columns=['jan_rank','jan_visits','jan_pageviews']
apr_df = pd.DataFrame.from_dict(apr_rank,orient='index')
apr_df.columns=['apr_rank','apr_visits','apr_pageviews']
may_df = pd.DataFrame.from_dict(may_rank,orient='index')
may_df.columns=['may_rank','may_visits','may_pageviews']

In [9]:
apr_df.head()


Out[9]:
apr_rank apr_visits apr_pageviews
search.stopbullying.gov 1145 9322 11864
fpds.gov 370 116950 587292
remm.nlm.gov 803 22034 32204
sdr.gov 2370 847 847
shreveport.va.gov 1194 8475 42373

In [10]:
usa = jan_df.join(apr_df,how='right')

In [ ]:


In [11]:
usa.head()


Out[11]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews
search.stopbullying.gov 1520.0 3277.0 4915.0 1145 9322 11864
fpds.gov 326.0 121238.0 434983.0 370 116950 587292
remm.nlm.gov NaN NaN NaN 803 22034 32204
sdr.gov NaN NaN NaN 2370 847 847
shreveport.va.gov 1339.0 4915.0 6553.0 1194 8475 42373

In [12]:
# usa.loc[usa.jan_29.isnull(),'jan_29'] = len(jan_df.jan_29)+1
# usa.loc[usa.may_7.isnull(),'may_7'] = len(may_df.may_7)+1
usa['rank_diff'] = usa.jan_rank-usa.apr_rank
usa['page_diff'] = usa.apr_pageviews-usa.jan_pageviews
usa['visit_diff'] = usa.apr_visits-usa.jan_visits
usa['pct_page_diff'] = usa.apr_pageviews/usa.jan_pageviews-1
usa['pct_visit_diff'] = usa.apr_visits/usa.jan_visits-1

Page differences


In [13]:
# Increasees in number of pageviews
usa.sort_values(by='page_diff',ascending=False).head(20)


Out[13]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
irs.gov 3.0 46463674.0 128178957.0 2 77180232 191648876 1.0 63469919.0 30716558.0 0.495166 0.661088
usajobs.gov 8.0 21969486.0 93979170.0 8 16577241 110669463 0.0 16690293.0 -5392245.0 0.177596 -0.245442
m.usps.com 17.0 7157142.0 18259437.0 13 12847553 32994312 4.0 14734875.0 5690411.0 0.806973 0.795068
sec.gov 50.0 2356770.0 20605558.0 26 4327151 34585849 24.0 13980291.0 1970381.0 0.678472 0.836051
find.irs.gov 35.0 3740359.0 9482784.0 15 11440763 21933213 20.0 12450429.0 7700404.0 1.312951 2.058734
spc.noaa.gov 94.0 1139474.0 6684478.0 39 3047480 18601833 55.0 11917355.0 1908006.0 1.782840 1.674462
e-verify.uscis.gov 126.0 719237.0 16208220.0 100 977973 25777309 26.0 9569089.0 258736.0 0.590385 0.359737
gsaauctions.gov 136.0 614382.0 14215984.0 129 765260 22218808 7.0 8002824.0 150878.0 0.562945 0.245577
surveys.nces.ed.gov 1532.0 3277.0 22937.0 275 191527 7600904 1257.0 7577967.0 188250.0 330.381785 57.445835
cdc.gov 7.0 22759991.0 43954537.0 6 25089169 51052073 1.0 7097536.0 2329178.0 0.161474 0.102337
egov.uscis.gov 24.0 5290240.0 23530017.0 19 6756830 29028181 5.0 5498164.0 1466590.0 0.233666 0.277226
usastaffing.opm.gov 302.0 139260.0 9735909.0 240 247459 14631464 62.0 4895555.0 108199.0 0.502835 0.776957
medlineplus.gov 4.0 45056329.0 62221347.0 4 47886795 67108971 0.0 4887624.0 2830466.0 0.078552 0.062821
uspto.gov 92.0 1144389.0 2767996.0 42 2912733 7262766 50.0 4494770.0 1768344.0 1.623835 1.545230
donotcall.gov 78.0 1394238.0 7182537.0 58 2292390 11531441 20.0 4348904.0 898152.0 0.605483 0.644188
fbo.gov 61.0 2006982.0 12423626.0 71 1892387 16740802 -10.0 4317176.0 -114595.0 0.347497 -0.057098
fs.usda.gov 99.0 1055918.0 3131711.0 59 2283915 7303444 40.0 4171733.0 1227997.0 1.332094 1.162966
nps.gov 13.0 11908364.0 39871763.0 10 14445870 43750324 3.0 3878561.0 2537506.0 0.097276 0.213086
passportstatus.state.gov 151.0 505432.0 4297398.0 106 928820 7563615 45.0 3266217.0 423388.0 0.760045 0.837675
fda.gov 22.0 5587601.0 11788765.0 18 7209375 14822144 4.0 3033379.0 1621774.0 0.257311 0.290245

In [14]:
# Biggest drops in number of pageviews
usa.sort_values(by='page_diff',ascending=True).head(20)


Out[14]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
whitehouse.gov 9.0 18694420.0 59996465.0 22 5476312 11482288 -13.0 -48514177.0 -13218108.0 -0.808617 -0.707062
forecast.weather.gov 2.0 63880179.0 105332133.0 3 48962227 74347160 -1.0 -30984973.0 -14917952.0 -0.294164 -0.233530
weather.gov 6.0 22945944.0 52648454.0 7 18732342 43888460 -1.0 -8759994.0 -4213602.0 -0.166387 -0.183632
petitions.whitehouse.gov 19.0 6816365.0 13900601.0 32 3581382 6143266 -13.0 -7757335.0 -3234983.0 -0.558058 -0.474591
applicationmanager.gov 102.0 1020694.0 17780219.0 139 683903 10521264 -37.0 -7258955.0 -336791.0 -0.408260 -0.329963
opm.gov 26.0 4666028.0 15290743.0 50 2670359 8689047 -24.0 -6601696.0 -1995669.0 -0.431745 -0.427702
nsopw.gov 106.0 945329.0 12140191.0 102 970346 7828024 4.0 -4312167.0 25017.0 -0.355198 0.026464
wrh.noaa.gov 20.0 6020126.0 16849635.0 38 3065277 12539076 -18.0 -4310559.0 -2954849.0 -0.255825 -0.490828
login.usajobs.gov 14.0 11280875.0 16934829.0 17 8222942 12911960 -3.0 -4022869.0 -3057933.0 -0.237550 -0.271072
search.usa.gov 21.0 5708839.0 10841797.0 36 3441551 6994120 -15.0 -3847677.0 -2267288.0 -0.354893 -0.397154
postcalc.usps.com 57.0 2102006.0 9500806.0 115 848311 5711907 -58.0 -3788899.0 -1253695.0 -0.398798 -0.596428
tools.usps.com 1.0 136655792.0 224797512.0 1 130000962 221116043 0.0 -3681469.0 -6654830.0 -0.016377 -0.048698
dol.gov 43.0 2915448.0 9140368.0 52 2620358 6097503 -9.0 -3042865.0 -295090.0 -0.332904 -0.101216
store.usps.com 29.0 4182714.0 18048909.0 31 3587315 15411131 -2.0 -2637778.0 -595399.0 -0.146146 -0.142348
search.irs.gov 68.0 1794815.0 3323398.0 222 294917 789836 -154.0 -2533562.0 -1499898.0 -0.762341 -0.835684
nhlbi.nih.gov 56.0 2109379.0 3683835.0 113 862718 1426282 -57.0 -2257553.0 -1246661.0 -0.612827 -0.591009
pptform.state.gov 47.0 2446060.0 24542519.0 61 2166965 22375589 -14.0 -2166930.0 -279095.0 -0.088293 -0.114100
ceac.state.gov 18.0 7142397.0 110897616.0 20 6304284 108882162 -2.0 -2015454.0 -838113.0 -0.018174 -0.117343
search.whitehouse.gov 105.0 973181.0 1975853.0 853 19492 36441 -748.0 -1939412.0 -953689.0 -0.981557 -0.979971
onboarding.usastaffing.gov 277.0 154824.0 3387294.0 457 72882 1538994 -180.0 -1848300.0 -81942.0 -0.545657 -0.529259

In [15]:
# Highest percent increases in pages
usa.sort_values(by='pct_page_diff',ascending=False).head(20)


Out[15]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
surveys.nces.ed.gov 1532.0 3277.0 22937.0 275 191527 7600904 1257.0 7577967.0 188250.0 330.381785 57.445835
docline.gov 1236.0 5734.0 16384.0 271 196612 2805953 965.0 2789569.0 190878.0 170.261780 33.288804
usaisr.amedd.army.mil 1892.0 1638.0 1638.0 809 22034 59322 1083.0 57684.0 20396.0 35.216117 12.451770
reverseauctions.gsa.gov 1658.0 2458.0 5734.0 727 26271 186442 931.0 180708.0 23813.0 31.515173 9.687958
wwwdev.cdc.gov 1281.0 5734.0 7373.0 506 59322 205086 775.0 197713.0 53588.0 26.815814 9.345657
images.nasa.gov 1134.0 7373.0 41778.0 147 662717 1151703 987.0 1109925.0 655344.0 26.567212 88.884308
stellwagen.noaa.gov 1880.0 1638.0 2458.0 1148 9322 66950 732.0 64492.0 7684.0 26.237592 4.691087
oceandata.sci.gsfc.nasa.gov 1640.0 2458.0 16384.0 679 30509 377969 961.0 361585.0 28051.0 22.069397 11.412124
lta.cr.usgs.gov 1493.0 3277.0 3277.0 615 38136 62712 878.0 59435.0 34859.0 18.137016 10.637473
nbib.opm.gov 1200.0 6553.0 42597.0 185 419495 665259 1015.0 622662.0 412942.0 14.617508 63.015718
fnrs.nmfs.noaa.gov 1775.0 1638.0 9830.0 1175 8475 152544 600.0 142714.0 6837.0 14.518210 4.173993
cit.nih.gov 1437.0 3277.0 4915.0 594 40678 72882 843.0 67967.0 37401.0 13.828484 11.413183
pages.nist.gov 1648.0 2458.0 2458.0 850 19492 34746 798.0 32288.0 17034.0 13.135883 6.930024
espanol.womenshealth.gov 1465.0 3277.0 4096.0 529 50848 57628 936.0 53532.0 47571.0 13.069336 14.516631
software.nasa.gov 1412.0 4096.0 12288.0 603 39831 166950 809.0 154662.0 35735.0 12.586426 8.724365
sbir.nih.gov 1516.0 3277.0 4915.0 820 21187 64407 696.0 59492.0 17910.0 12.104171 5.465365
account.uspto.gov 1686.0 1638.0 2458.0 908 16102 31356 778.0 28898.0 14464.0 11.756713 8.830281
realmail.usps.com 1148.0 7373.0 28671.0 296 169493 362715 852.0 334044.0 162120.0 11.650936 21.988336
dnfsb.gov 1567.0 2458.0 2458.0 1441 4237 28814 126.0 26356.0 1779.0 10.722539 0.723759
fsapubs.gov 1778.0 1638.0 8192.0 1459 4237 86441 319.0 78249.0 2599.0 9.551880 1.586691

In [16]:
# Greatest percentage declines in pages
usa.sort_values(by='pct_page_diff',ascending=True).head(20)


Out[16]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
wcms.epa.gov 457.0 60619.0 978916.0 2461 847 4237 -2004.0 -974679.0 -59772.0 -0.995672 -0.986027
www1.va.gov 442.0 65534.0 162197.0 2474 847 847 -2032.0 -161350.0 -64687.0 -0.994778 -0.987075
nvd.nist.gov 509.0 45874.0 70449.0 2308 847 847 -1799.0 -69602.0 -45027.0 -0.987977 -0.981536
oea.gov 1641.0 2458.0 68811.0 2314 847 847 -673.0 -67964.0 -1611.0 -0.987691 -0.655411
trimewebmod.epa.gov 1534.0 3277.0 107312.0 2428 847 1695 -894.0 -105617.0 -2430.0 -0.984205 -0.741532
web.nvd.nist.gov 264.0 167931.0 517719.0 1236 7627 9322 -972.0 -508397.0 -160304.0 -0.981994 -0.954583
search.whitehouse.gov 105.0 973181.0 1975853.0 853 19492 36441 -748.0 -1939412.0 -953689.0 -0.981557 -0.979971
m.earthobservatory.nasa.gov 265.0 165474.0 225273.0 1473 4237 4237 -1208.0 -221036.0 -161237.0 -0.981192 -0.974395
useiti.doi.gov 1276.0 5734.0 44236.0 2437 847 847 -1161.0 -43389.0 -4887.0 -0.980853 -0.852285
cats.gsfc.nasa.gov 837.0 15564.0 40959.0 2082 847 847 -1245.0 -40112.0 -14717.0 -0.979321 -0.945580
disa.mil 305.0 137622.0 278520.0 1672 2542 5932 -1367.0 -272588.0 -135080.0 -0.978702 -0.981529
blstest.psb.bls.gov 1226.0 5734.0 34405.0 2073 847 847 -847.0 -33558.0 -4887.0 -0.975381 -0.852285
access.trade.gov 507.0 46693.0 374364.0 2034 847 9322 -1527.0 -365042.0 -45846.0 -0.975099 -0.981860
ssaikansascity.usajobs.gov 716.0 22118.0 83556.0 1984 1695 2542 -1268.0 -81014.0 -20423.0 -0.969577 -0.923366
pmi.gov 1103.0 8192.0 27033.0 2342 847 847 -1239.0 -26186.0 -7345.0 -0.968668 -0.896606
clinton.presidentiallibraries.us 1556.0 2458.0 24575.0 2096 847 847 -540.0 -23728.0 -1611.0 -0.965534 -0.655411
ampd.epa.gov 1170.0 6553.0 19660.0 2046 847 847 -876.0 -18813.0 -5706.0 -0.956918 -0.870746
etas.uspto.gov 1083.0 8192.0 400577.0 2165 847 18644 -1082.0 -381933.0 -7345.0 -0.953457 -0.896606
e-enterprise.gov 921.0 12288.0 16384.0 2137 847 847 -1216.0 -15537.0 -11441.0 -0.948303 -0.931071
www-red-dev.cancer.gov 1280.0 5734.0 30310.0 2473 847 1695 -1193.0 -28615.0 -4887.0 -0.944078 -0.852285

Visit differences


In [17]:
# Increases in total visits
usa.sort_values(by='visit_diff',ascending=False).head(20)


Out[17]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
irs.gov 3.0 46463674.0 128178957.0 2 77180232 191648876 1.0 63469919.0 30716558.0 0.495166 0.661088
find.irs.gov 35.0 3740359.0 9482784.0 15 11440763 21933213 20.0 12450429.0 7700404.0 1.312951 2.058734
m.usps.com 17.0 7157142.0 18259437.0 13 12847553 32994312 4.0 14734875.0 5690411.0 0.806973 0.795068
medlineplus.gov 4.0 45056329.0 62221347.0 4 47886795 67108971 0.0 4887624.0 2830466.0 0.078552 0.062821
nps.gov 13.0 11908364.0 39871763.0 10 14445870 43750324 3.0 3878561.0 2537506.0 0.097276 0.213086
cdc.gov 7.0 22759991.0 43954537.0 6 25089169 51052073 1.0 7097536.0 2329178.0 0.161474 0.102337
sec.gov 50.0 2356770.0 20605558.0 26 4327151 34585849 24.0 13980291.0 1970381.0 0.678472 0.836051
spc.noaa.gov 94.0 1139474.0 6684478.0 39 3047480 18601833 55.0 11917355.0 1908006.0 1.782840 1.674462
reg.usps.com 15.0 10489551.0 19597971.0 14 12272125 19374720 1.0 -223251.0 1782574.0 -0.011392 0.169938
uspto.gov 92.0 1144389.0 2767996.0 42 2912733 7262766 50.0 4494770.0 1768344.0 1.623835 1.545230
fda.gov 22.0 5587601.0 11788765.0 18 7209375 14822144 4.0 3033379.0 1621774.0 0.257311 0.290245
egov.uscis.gov 24.0 5290240.0 23530017.0 19 6756830 29028181 5.0 5498164.0 1466590.0 0.233666 0.277226
nasa.gov 16.0 8252381.0 16743961.0 16 9709394 19054378 0.0 2310417.0 1457013.0 0.137985 0.176557
fs.usda.gov 99.0 1055918.0 3131711.0 59 2283915 7303444 40.0 4171733.0 1227997.0 1.332094 1.162966
consumerfinance.gov 74.0 1490081.0 3201341.0 49 2670359 4544101 25.0 1342760.0 1180278.0 0.419437 0.792090
waterdata.usgs.gov 67.0 1829220.0 4142574.0 45 2795783 6937339 22.0 2794765.0 966563.0 0.674645 0.528402
donotcall.gov 78.0 1394238.0 7182537.0 58 2292390 11531441 20.0 4348904.0 898152.0 0.605483 0.644188
uscis.gov 12.0 12684943.0 31217166.0 12 13564507 32458715 0.0 1241549.0 879564.0 0.039771 0.069339
apps.irs.gov 28.0 4462053.0 29567346.0 23 5252581 30289207 5.0 721861.0 790528.0 0.024414 0.177167
informeddelivery.usps.com 388.0 90929.0 287531.0 123 788141 1176280 265.0 888749.0 697212.0 3.090968 7.667653

In [18]:
# Decreases in total visits
usa.sort_values(by='visit_diff',ascending=True).head(20)


Out[18]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
forecast.weather.gov 2.0 63880179.0 105332133.0 3 48962227 74347160 -1.0 -30984973.0 -14917952.0 -0.294164 -0.233530
whitehouse.gov 9.0 18694420.0 59996465.0 22 5476312 11482288 -13.0 -48514177.0 -13218108.0 -0.808617 -0.707062
tools.usps.com 1.0 136655792.0 224797512.0 1 130000962 221116043 0.0 -3681469.0 -6654830.0 -0.016377 -0.048698
usajobs.gov 8.0 21969486.0 93979170.0 8 16577241 110669463 0.0 16690293.0 -5392245.0 0.177596 -0.245442
weather.gov 6.0 22945944.0 52648454.0 7 18732342 43888460 -1.0 -8759994.0 -4213602.0 -0.166387 -0.183632
petitions.whitehouse.gov 19.0 6816365.0 13900601.0 32 3581382 6143266 -13.0 -7757335.0 -3234983.0 -0.558058 -0.474591
login.usajobs.gov 14.0 11280875.0 16934829.0 17 8222942 12911960 -3.0 -4022869.0 -3057933.0 -0.237550 -0.271072
wrh.noaa.gov 20.0 6020126.0 16849635.0 38 3065277 12539076 -18.0 -4310559.0 -2954849.0 -0.255825 -0.490828
search.usa.gov 21.0 5708839.0 10841797.0 36 3441551 6994120 -15.0 -3847677.0 -2267288.0 -0.354893 -0.397154
opm.gov 26.0 4666028.0 15290743.0 50 2670359 8689047 -24.0 -6601696.0 -1995669.0 -0.431745 -0.427702
search.irs.gov 68.0 1794815.0 3323398.0 222 294917 789836 -154.0 -2533562.0 -1499898.0 -0.762341 -0.835684
postcalc.usps.com 57.0 2102006.0 9500806.0 115 848311 5711907 -58.0 -3788899.0 -1253695.0 -0.398798 -0.596428
nhlbi.nih.gov 56.0 2109379.0 3683835.0 113 862718 1426282 -57.0 -2257553.0 -1246661.0 -0.612827 -0.591009
search.whitehouse.gov 105.0 973181.0 1975853.0 853 19492 36441 -748.0 -1939412.0 -953689.0 -0.981557 -0.979971
usa.gov 36.0 3634685.0 5934932.0 48 2709342 4455965 -12.0 -1478967.0 -925343.0 -0.249197 -0.254587
usps.com 5.0 30359489.0 41651833.0 5 29474794 41907937 0.0 256104.0 -884695.0 0.006149 -0.029141
ceac.state.gov 18.0 7142397.0 110897616.0 20 6304284 108882162 -2.0 -2015454.0 -838113.0 -0.018174 -0.117343
mobile.weather.gov 41.0 3067815.0 4749584.0 56 2417815 3626298 -15.0 -1123286.0 -650000.0 -0.236502 -0.211877
store.usps.com 29.0 4182714.0 18048909.0 31 3587315 15411131 -2.0 -2637778.0 -595399.0 -0.146146 -0.142348
studentaid.ed.gov 31.0 4064752.0 8879870.0 34 3517823 7439038 -3.0 -1440832.0 -546929.0 -0.162258 -0.134554

In [19]:
# Highest percent increases in visits
usa.sort_values(by='pct_visit_diff',ascending=False).head(20)


Out[19]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
images.nasa.gov 1134.0 7373.0 41778.0 147 662717 1151703 987.0 1109925.0 655344.0 26.567212 88.884308
nbib.opm.gov 1200.0 6553.0 42597.0 185 419495 665259 1015.0 622662.0 412942.0 14.617508 63.015718
surveys.nces.ed.gov 1532.0 3277.0 22937.0 275 191527 7600904 1257.0 7577967.0 188250.0 330.381785 57.445835
docline.gov 1236.0 5734.0 16384.0 271 196612 2805953 965.0 2789569.0 190878.0 170.261780 33.288804
realmail.usps.com 1148.0 7373.0 28671.0 296 169493 362715 852.0 334044.0 162120.0 11.650936 21.988336
espanol.womenshealth.gov 1465.0 3277.0 4096.0 529 50848 57628 936.0 53532.0 47571.0 13.069336 14.516631
usaisr.amedd.army.mil 1892.0 1638.0 1638.0 809 22034 59322 1083.0 57684.0 20396.0 35.216117 12.451770
cit.nih.gov 1437.0 3277.0 4915.0 594 40678 72882 843.0 67967.0 37401.0 13.828484 11.413183
oceandata.sci.gsfc.nasa.gov 1640.0 2458.0 16384.0 679 30509 377969 961.0 361585.0 28051.0 22.069397 11.412124
patentsgazette.uspto.gov 1844.0 1638.0 6553.0 851 19492 55933 993.0 49380.0 17854.0 7.535480 10.899878
lta.cr.usgs.gov 1493.0 3277.0 3277.0 615 38136 62712 878.0 59435.0 34859.0 18.137016 10.637473
financialresearch.gov 1304.0 4915.0 9830.0 519 52543 60170 785.0 50340.0 47628.0 5.121058 9.690336
reverseauctions.gsa.gov 1658.0 2458.0 5734.0 727 26271 186442 931.0 180708.0 23813.0 31.515173 9.687958
wwwdev.cdc.gov 1281.0 5734.0 7373.0 506 59322 205086 775.0 197713.0 53588.0 26.815814 9.345657
account.uspto.gov 1686.0 1638.0 2458.0 908 16102 31356 778.0 28898.0 14464.0 11.756713 8.830281
software.nasa.gov 1412.0 4096.0 12288.0 603 39831 166950 809.0 154662.0 35735.0 12.586426 8.724365
afterdeployment.dcoe.mil 1351.0 4096.0 7373.0 638 35593 55933 713.0 48560.0 31497.0 6.586193 7.689697
informeddelivery.usps.com 388.0 90929.0 287531.0 123 788141 1176280 265.0 888749.0 697212.0 3.090968 7.667653
pdhealth.mil 1848.0 1638.0 2458.0 980 13559 22882 868.0 20424.0 11921.0 8.309194 7.277778
siouxfalls.va.gov 1875.0 1638.0 7373.0 988 13559 38136 887.0 30763.0 11921.0 4.172386 7.277778

In [20]:
# Greatest percent decrease in visits
usa.sort_values(by='pct_visit_diff',ascending=True).head(20)


Out[20]:
jan_rank jan_visits jan_pageviews apr_rank apr_visits apr_pageviews rank_diff page_diff visit_diff pct_page_diff pct_visit_diff
www1.va.gov 442.0 65534.0 162197.0 2474 847 847 -2032.0 -161350.0 -64687.0 -0.994778 -0.987075
wcms.epa.gov 457.0 60619.0 978916.0 2461 847 4237 -2004.0 -974679.0 -59772.0 -0.995672 -0.986027
access.trade.gov 507.0 46693.0 374364.0 2034 847 9322 -1527.0 -365042.0 -45846.0 -0.975099 -0.981860
nvd.nist.gov 509.0 45874.0 70449.0 2308 847 847 -1799.0 -69602.0 -45027.0 -0.987977 -0.981536
disa.mil 305.0 137622.0 278520.0 1672 2542 5932 -1367.0 -272588.0 -135080.0 -0.978702 -0.981529
search.whitehouse.gov 105.0 973181.0 1975853.0 853 19492 36441 -748.0 -1939412.0 -953689.0 -0.981557 -0.979971
m.earthobservatory.nasa.gov 265.0 165474.0 225273.0 1473 4237 4237 -1208.0 -221036.0 -161237.0 -0.981192 -0.974395
vehiclestd.fas.gsa.gov 734.0 21299.0 134345.0 2448 847 14407 -1714.0 -119938.0 -20452.0 -0.892761 -0.960233
jobview.doors.dol.gov 470.0 57342.0 66353.0 1725 2542 4237 -1255.0 -62116.0 -54800.0 -0.936145 -0.955669
web.nvd.nist.gov 264.0 167931.0 517719.0 1236 7627 9322 -972.0 -508397.0 -160304.0 -0.981994 -0.954583
cats.gsfc.nasa.gov 837.0 15564.0 40959.0 2082 847 847 -1245.0 -40112.0 -14717.0 -0.979321 -0.945580
dodig.mil 363.0 102397.0 137622.0 1251 6780 8475 -888.0 -129147.0 -95617.0 -0.938418 -0.933787
e-enterprise.gov 921.0 12288.0 16384.0 2137 847 847 -1216.0 -15537.0 -11441.0 -0.948303 -0.931071
voyager.gsfc.nasa.gov 701.0 22937.0 22937.0 2009 1695 3390 -1308.0 -19547.0 -21242.0 -0.852204 -0.926102
ssaikansascity.usajobs.gov 716.0 22118.0 83556.0 1984 1695 2542 -1268.0 -81014.0 -20423.0 -0.969577 -0.923366
natice.noaa.gov 544.0 40140.0 57342.0 1584 3390 16102 -1040.0 -41240.0 -36750.0 -0.719194 -0.915546
search.www.uscg.mil 1021.0 9830.0 13107.0 2374 847 847 -1353.0 -12260.0 -8983.0 -0.935378 -0.913835
eagnmncom1179.usps.com 1003.0 9830.0 14745.0 2139 847 1695 -1136.0 -13050.0 -8983.0 -0.885046 -0.913835
federalreporter.nih.gov 772.0 18841.0 58162.0 1882 1695 43221 -1110.0 -14941.0 -17146.0 -0.256886 -0.910037
etas.uspto.gov 1083.0 8192.0 400577.0 2165 847 18644 -1082.0 -381933.0 -7345.0 -0.953457 -0.896606

Dials


In [21]:
search = 'search.stopbullying.gov'
usa.loc[search,:]


Out[21]:
jan_rank           1520.000000
jan_visits         3277.000000
jan_pageviews      4915.000000
apr_rank           1145.000000
apr_visits         9322.000000
apr_pageviews     11864.000000
rank_diff           375.000000
page_diff          6949.000000
visit_diff         6045.000000
pct_page_diff         1.413835
pct_visit_diff        1.844675
Name: search.stopbullying.gov, dtype: float64

Plot these


In [22]:
# scatter plot of

In [29]:
# taken from http://stackoverflow.com/questions/7404116/defining-the-midpoint-of-a-colormap-in-matplotlib

from mpl_toolkits.axes_grid1 import AxesGrid

def shiftedColorMap(cmap, start=0, midpoint=0.5, stop=1.0, name='shiftedcmap'):
    '''
    Function to offset the "center" of a colormap. Useful for
    data with a negative min and positive max and you want the
    middle of the colormap's dynamic range to be at zero

    Input
    -----
      cmap : The matplotlib colormap to be altered
      start : Offset from lowest point in the colormap's range.
          Defaults to 0.0 (no lower ofset). Should be between
          0.0 and `midpoint`.
      midpoint : The new center of the colormap. Defaults to 
          0.5 (no shift). Should be between 0.0 and 1.0. In
          general, this should be  1 - vmax/(vmax + abs(vmin))
          For example if your data range from -15.0 to +5.0 and
          you want the center of the colormap at 0.0, `midpoint`
          should be set to  1 - 5/(5 + 15)) or 0.75
      stop : Offset from highets point in the colormap's range.
          Defaults to 1.0 (no upper ofset). Should be between
          `midpoint` and 1.0.
    '''
    cdict = {
        'red': [],
        'green': [],
        'blue': [],
        'alpha': []
    }

    # regular index to compute the colors
    reg_index = np.linspace(start, stop, 257)

    # shifted index to match the data
    shift_index = np.hstack([
        np.linspace(0.0, midpoint, 128, endpoint=False), 
        np.linspace(midpoint, 1.0, 129, endpoint=True)
    ])

    for ri, si in zip(reg_index, shift_index):
        r, g, b, a = cmap(ri)

        cdict['red'].append((si, r, r))
        cdict['green'].append((si, g, g))
        cdict['blue'].append((si, b, b))
        cdict['alpha'].append((si, a, a))

    newcmap = matplotlib.colors.LinearSegmentedColormap(name, cdict)
    plt.register_cmap(cmap=newcmap)

    return newcmap

In [55]:
import matplotlib
orig_cmap = matplotlib.cm.PuOr
shifted_cmap = shiftedColorMap(orig_cmap, midpoint=0, name='shifted')

In [60]:
import numpy as np
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10,10))
x = usa.jan_rank
y = usa.apr_rank
ax.scatter(x,y, s=25, zorder=0,alpha =.3, c=usa.rank_diff,cmap=shifted_cmap)

plt.xlim(0,2000)
plt.ylim(0,2000)
lims = [
    np.min([ax.get_xlim(), ax.get_ylim()]),  # min of both axes
    np.max([ax.get_xlim(), ax.get_ylim()]),  # max of both axes
]

# now plot both limits against eachother
ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
# fig, ax = plt.subplots(figsize=(10,10))
ax.set_aspect('equal')
ax.set_xlim(lims)
ax.set_ylim(lims)

plt.xlabel('Rank of domain in January',fontsize=14)
plt.ylabel('Rank of domain in April',fontsize=14)
plt.title('How pages changed in popularity over the first 100 days',fontsize=18)
fig.gca().invert_yaxis()
# fig.patch.set_visible(False)
# ax.axis('off')

ax.annotate('Became More Popular', xy=(1450, 250), xytext=(1250, 200),fontsize=16,style='italic')
ax.annotate('Became Less Popular', xy=(1450, 250), xytext=(100, 1800),fontsize=16,style='italic')
# plt.savefig('../austinbrian.github.io/assets/pagerank.png')


Out[60]:
<matplotlib.text.Annotation at 0x1134cb150>